1.How many customer each country have?

ggplot(customer_count_by_country, aes(x = Country, y = TotalCustomers)) +
  geom_bar(stat = "identity", fill = "black") +
  geom_text(position = position_stack(vjust = 0.5), size = 3, color = "white", aes(label = TotalCustomers)) +
  labs(title = "Total Number of Customers by Country",
       x = "Country",
       y = "Total Customers") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.Top Selling Countries

df_sales_info <- df_order_item|>
  left_join(df_orders|>
              select(OrderID,CustomerID),
            by= c("OrderID"="OrderID"))|>
  left_join(df_customers|>
              select(CustomerID,Country),
            by= c("CustomerID"="CustomerID"))|>
  mutate(Sales= UnitPrice*Quantity) |>
  group_by(Country)|>
  summarise(Total_sale=sum(Sales))

top_10_selling_country <- df_sales_info |>
  arrange(desc(Total_sale))|>
  head(10)|>
  select(Country,Total_sale)

bar_chart_ggplot <- ggplot(top_10_selling_country, aes(x = Country, y = Total_sale)) +
  geom_bar(stat = "identity", fill = "royalblue") +
  labs(title = "Top 10 Selling Countries",
       x = "Country",
       y = "Total Sales") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::comma, breaks = scales::pretty_breaks())

# Convert ggplot to plotly
bar_chart_plotly <- ggplotly(bar_chart_ggplot)
bar_chart_plotly

CATEGORIES

3. What is th total sales revenue for each catgory?

df_category_sales <- df_order_item |>
  left_join(df_product, by = "ProductID") |>
  
  left_join(df_categories, by="CategoryID")|>
  group_by(CategoryID, CategoryName) |>
  summarise(TotalSales = sum(UnitPrice.x * Quantity))

# Plot sales trends by category
bar_chart2_ggplot<-ggplot(df_category_sales, aes(x = CategoryName, y = TotalSales)) +
  geom_bar(stat = "identity", fill = "brown") +
  labs(title = "Total Sales by Category",
       x = "Category",
       y = "Total Sales") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  coord_flip()+
  scale_y_continuous(labels = scales::comma, breaks = scales::pretty_breaks())
bar_chart_plotly <- ggplotly(bar_chart2_ggplot)

bar_chart_plotly

FREIGHT

4.What is average,maximum and minimum freight cost?

freight_cost <- df_orders|>
  
  summarise(avgFreight= mean(Freight, na.rm= TRUE),
            maxFreight = max(Freight, na.rm= TRUE),
            minFreight = min(Freight, na.rm= TRUE))
df_orders$OrderDate <- ymd_hms(df_orders$OrderDate)
df_orders$Month <- format(df_orders$OrderDate, "%Y-%m")
df_orders$Year <- format(df_orders$OrderDate, "%Y")
print(freight_cost)
##   avgFreight maxFreight minFreight
## 1    78.2442    1007.64       0.02

5.Average Freight Cost Over Month

monthly_analysis <- df_orders |>
  group_by(Month) |>
  summarise(AvgFreight_Cost = mean(Freight, na.rm = TRUE))

line_chart_ggplot<-ggplot(monthly_analysis,aes(x=Month,y=AvgFreight_Cost ,group=1)) +
  geom_line(color="purple")+
  labs(title = "Freight Cost Over Month",
       x = "Month",
       y = "Average Freight Cost")+
  geom_point()
line_chart_plotly <- ggplotly(line_chart_ggplot)
line_chart_plotly

6.Average Freight Cost Over Year

yearly_analysis <- df_orders |>
  group_by(Year) |>
  summarise(AvgFreight_Cost = mean(Freight, na.rm = TRUE))

line_chart2_ggplot<-ggplot(yearly_analysis,aes(x=Year,y=AvgFreight_Cost ,group=1)) +
  geom_line(color="orange")+
  labs(title = "Freight Cost Over Month",
       x = "Year",
       y = "Average Freight Cost")+
  geom_point()
line_chart_plotly <- ggplotly(line_chart2_ggplot)
line_chart_plotly

7.Average Freight Cost Across Country

freight_country <- df_orders |>
  group_by(ShipCountry) |>
  summarise(AvgFreight_Cost = mean(Freight, na.rm = TRUE))
print(freight_country)
## # A tibble: 21 × 2
##    ShipCountry AvgFreight_Cost
##    <chr>                 <dbl>
##  1 Argentina              37.4
##  2 Austria               185. 
##  3 Belgium                67.4
##  4 Brazil                 58.8
##  5 Canada                 73.3
##  6 Denmark                77.6
##  7 Finland                41.4
##  8 France                 55.0
##  9 Germany                92.5
## 10 Ireland               145. 
## # ℹ 11 more rows
bar_chart <- ggplot(freight_country, aes(x = reorder(ShipCountry, AvgFreight_Cost), y = AvgFreight_Cost)) +
  geom_bar(stat = "identity", fill = "pink") +
  labs(title = "Average Freight Cost Across Countries",
       x = "Country",
       y = "Average Freight Cost") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

bar_chart_plotly <-ggplotly(bar_chart)
bar_chart_plotly

SHIPPING

8.Best Delivery Time Of Shipping

df_orders<-df_orders |>
  mutate(required_date= ymd_hms(RequiredDate),
         shipped_date=ymd_hms(ShippedDate),
         order_date= ymd_hms(OrderDate),
         islate= if_else(shipped_date > required_date, TRUE, FALSE)) 
#seen late shipping
df_late_shipments <- df_orders|>
  filter(islate) |>
  select(OrderID,order_date,shipped_date,required_date,islate) 

df_suppliers_delivery <- df_late_shipments |>
  left_join(df_order_item |>
              select(OrderID, ProductID),
            by = c("OrderID"= "OrderID")) |>
  left_join(df_product |>
              select(ProductID,ProductName, SupplierID),
            by =c("ProductID"="ProductID"))

# Step 3: Analyze the best time delivery of shipping
plotly::ggplotly(
  df_delivery_times <- df_suppliers_delivery |>
    
    left_join(df_suppliers |>
                select(SupplierID,CompanyName),
              by =c("SupplierID"="SupplierID"))|>
    group_by(SupplierID,CompanyName) |>
    summarize(AvgDeliveryTime = mean(shipped_date - order_date, na.rm = TRUE))|>
    ggplot(aes(y = AvgDeliveryTime, x = CompanyName)) +
    geom_bar(stat = "identity", fill = "skyblue") +
    labs(title = "Average Delivery Time by Supplier",
         x = "Company Name",
         y = "Average Delivery Time") +
    theme_minimal()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
)

Product

9.Top 10 Selling Products

df_total_sales <- df_order_item|>
  mutate(Sales= UnitPrice*Quantity)|>
  group_by(ProductID)|>
  summarise(Total_sale=sum(Sales))

df_sales_info <- df_total_sales|>
  left_join(df_product|>
              select(ProductID,ProductName,CategoryID),
            by= c("ProductID"="ProductID"))|>
  left_join(df_categories|>
              select(CategoryID,CategoryName),
            by= c("CategoryID"="CategoryID"))

top_10_selling_products <- df_sales_info |>
  arrange(desc(Total_sale))|>
  head(10)|>
  select(CategoryName,ProductName,Total_sale)
ggplot(top_10_selling_products, aes(x = ProductName, y = Total_sale)) +
  geom_bar(stat = "identity", fill = "royalblue") +
  labs(title = "Top 10 Selling Products",
       x = "Product",
       y = "Total Sales") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_y_continuous(labels = scales::comma, breaks = scales::pretty_breaks())

10.Top 3 Supplier By the number of products

df_combined <- df_suppliers |>
  left_join(df_product, by = "SupplierID")

# Count the number of products supplied by each supplier
supplier_product_count <- df_combined|>
  group_by(SupplierID, CompanyName) |>
  summarise(NumProducts = n_distinct(ProductID)) |>
  arrange(desc(NumProducts))

# Select the top 3 suppliers
top_suppliers <- head(supplier_product_count, 3)

# Print the result
print(top_suppliers)
## # A tibble: 3 × 3
## # Groups:   SupplierID [3]
##   SupplierID CompanyName                     NumProducts
##        <int> <chr>                                 <int>
## 1          7 Pavlova, Ltd.                             5
## 2         12 Plutzer Lebensmittelgromrkte AG           5
## 3          2 New Orleans Cajun Delights                4

12.Proportion of Orders by City and Country